##########################################
# Replication Data for Proksch, Lowe, Wäckerle, Soroka. (2018). Multilingual Sentiment Analysis: A New Approach to Measuring Conflict in Legislative Speeches. Legislative Studies Quarterly, Forthcoming.
##########################################

#Part 2: Irish Dail Debates: Budget Speeches
##########################
#Intro
rm(list=ls())
library(readtext)
library(tidyverse)
#install_version("quanteda", version = "1.1.1", repos = "http://cran.us.r-project.org") #all analysis is run on quanteda version 1.1.1
library(quanteda)
library(rstudioapi)

current_path <- getActiveDocumentContext()$path 
setwd(dirname(current_path ))
load("2_ireland_budgets.RData")

##########################
#Run Sentiment and Wordfish

#apply LSD dictionary aggregated on party and year level
senti_ireland_debates=data.frame(dfm(corp.ireland,groups = "Party_Year",remove=stopwords("english"),remove_punct=T,remove_numbers=T,dictionary = data_dictionary_LSD2015))
senti_ireland_debates=rename(senti_ireland_debates,Party_Year=document)
#calculate sentiment
senti_ireland_debates$Sentiment=log((senti_ireland_debates$positive+0.5)/(senti_ireland_debates$negative+0.5))

#create dfm for wordfish aggregated on party and year level
dfm_ireland_debates=dfm(corp.ireland,groups = "Party_Year",remove=stopwords("english"),remove_punct=T,remove_numbers=T)
#run wordfish model
wf_ireland_debates=textmodel_wordfish(dfm_ireland_debates,dir = c(10,1))
sum_wf_ireland_debates=data.frame(summary(wf_ireland_debates)[2])
names(sum_wf_ireland_debates)=c("wf.theta","wf.se")
sum_wf_ireland_debates$Party_Year=row.names(sum_wf_ireland_debates)
#join Wordfish and Sentiment models
senti_ireland_debates=left_join(senti_ireland_debates,sum_wf_ireland_debates)
info2=do.call(rbind,str_split(senti_ireland_debates$Party_Year,"_"))
senti_ireland_debates$Party=info2[,1]
senti_ireland_debates$Year=info2[,2]

#define Opposition and Government parties
senti_ireland_debates$gov_oppo="Opposition"
senti_ireland_debates$gov_oppo[senti_ireland_debates$Party%in%c("FF","Green","PD") & as.numeric(paste(senti_ireland_debates$Year))<=2011]<-"Government"
senti_ireland_debates$gov_oppo[senti_ireland_debates$Party%in%c("FG","LAB") & as.numeric(paste(senti_ireland_debates$Year))>2011]<-"Government"
# correlaion between sentiment and wordfish positions
cor(senti_ireland_debates$Sentiment,senti_ireland_debates$wf.theta)
senti_ireland_debates$gov_oppo=factor(senti_ireland_debates$gov_oppo)
senti_ireland_debates$gov_oppo=relevel(senti_ireland_debates$gov_oppo,"Opposition")

##########################
#plot Wordfish vs Sentiment
irish_budgetplot=ggplot(senti_ireland_debates,aes(x=wf.theta,y=Sentiment,colour=gov_oppo,label=Party_Year))+
  geom_text()+
  theme_bw()+
  xlim(-2, 2.5)+
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line = element_line(colour = "black"),
        axis.text.y = element_text(size=16),
        axis.text.x = element_text(size=16),
        legend.text=element_text(size=16),
        axis.title=element_text(size=18),
        plot.title = element_text(size=20,hjust = 0.5))+
  theme(legend.position = "bottom",
        legend.title = element_blank()) +
  labs(x="Estimated Wordfish Position",y="Sentiment")
irish_budgetplot

#calculate confidence intervals for sentiment
senti_ireland_debates$Sentiment.sd=sqrt( (1/(senti_ireland_debates$positive+0.5))+(1/(senti_ireland_debates$negative+0.5)))
senti_ireland_debates$Sentiment.upper= senti_ireland_debates$Sentiment+1.96*senti_ireland_debates$Sentiment.sd 
senti_ireland_debates$Sentiment.lower= senti_ireland_debates$Sentiment-1.96*senti_ireland_debates$Sentiment.sd 

senti_ireland_debates$Year=factor(senti_ireland_debates$Year)
#plot and save sentiment estimates by year and party

##########################
#PLot Sentiment Positions
senti_ireland_debates<-senti_ireland_debates[order(senti_ireland_debates$Sentiment),]
senti_ireland_debates$col<-ifelse(senti_ireland_debates$gov_oppo=="Opposition","#F8766D","#00B0F6")
shapes=ifelse(as.character(senti_ireland_debates$gov_oppo)=="Opposition",1,2)
dotchart(senti_ireland_debates$Sentiment, 
         groups=senti_ireland_debates$Year,
         labels=senti_ireland_debates$Party,
         col=ifelse(senti_ireland_debates$gov_oppo=="Opposition","#F8766D","#00B0F6"),
         pch=c(19,19,19,19,19,19,15,15,19,19,19,19,15,15,19,19,19,15,15,19,19,19,15,15,19,19,19,15,15),
         xlab="Sentiment",
         xlim=c(-2,2))
senti_ireland_debates2<-senti_ireland_debates[order(senti_ireland_debates$Year,senti_ireland_debates$Sentiment,decreasing=T),] 
senti_ireland_debates2<-rbind(senti_ireland_debates2[1:8,][8:1,],rep(NA,9),rep(NA,9), senti_ireland_debates2[9:14,][6:1,],rep(NA,9),rep(NA,9),senti_ireland_debates2[15:19,][5:1,],rep(NA,9),rep(NA,9),senti_ireland_debates2[20:24,][5:1,],rep(NA,9),rep(NA,9),senti_ireland_debates2[25:29,][5:1,])
senti_ireland_debates2
for (i in 1:nrow(senti_ireland_debates2)){
  lines(x=c(senti_ireland_debates2$Sentiment.lower[i],senti_ireland_debates2$Sentiment.upper[i]), y=c(i,i),lwd=1.5)
  if(i %in% c(1:6,11:14,19:21,26:28,33:35)){
  points(x=senti_ireland_debates2$Sentiment[i], y=i,col=senti_ireland_debates2$col[i],pch=19,cex=1.2)
  }
  if(i %in% c(7:8,15:16,22:23,29:30,36:37)){
    points(x=senti_ireland_debates2$Sentiment[i], y=i,col=senti_ireland_debates2$col[i],pch=15,cex=1.2)
  }
 }
legend("top",inset=c(0,-0.15),xpd=TRUE,legend=c("Opposition","Government"),col=c("#F8766D","#00B0F6"),pch=c(19,15),bty="n",horiz=T)
grid()